library(haven)
library(tidyverse)
library(hrbrthemes)
col5 <- c('#00296b', '#02529c', '#5280d0', '#89b1ff', '#c0e4ff')
col3 <- c('#02529c', '#89b1ff', '#c0e4ff')
col2 <- c('#00296b',  '#5280d0')

# Figure 1
rr <- readxl::read_xlsx("ICENES-responserates.xlsx")

rr_data <- rr %>% mutate(n_nocontact=nocontact_fresh+replace_na(nophonenum_fresh,0)) %>% 
                  select(year,
                         n_total=freshsample,
                         n_resp=responses_fresh,
                         n_notsay=Nonresponse2partyQ_fresh,
                         n_nocontact,
                         n_rejects=rejects_fresh,
                         n_unavailable=unavailable_fresh) %>% 
                  mutate(year_no=row_number(),
                         n_sayparty=n_resp-n_notsay,
                         p_rr = n_resp/n_total,
                         p_rr_sayparty = n_sayparty/n_total,
                         p_rej = n_rejects/n_total,
                         p_nocontact = n_nocontact/n_total,
                         p_notsay = n_notsay/n_total,
                         p_unavailable=n_unavailable/n_total,
                         check=p_rr_sayparty+p_rej+p_nocontact+p_notsay+p_unavailable) %>% 
                  select(year,year_no, p_rr_sayparty,p_notsay,p_rej,p_nocontact,p_unavailable) %>% 
                  pivot_longer(!starts_with("y")) %>% 
                  mutate(name=fct_rev(factor(case_when(name=="p_rr_sayparty" ~ "Participate, answer vote question",
                                        name=="p_notsay" ~ "Participate, don't answer vote question",
                                        name=="p_rej" ~ "Refusal",
                                        name=="p_nocontact" ~ "Noncontact",
                                        name=="p_unavailable" ~ "Unable to participate"),
                                     levels=c("Participate, answer vote question","Participate, don't answer vote question",
                                              "Refusal","Noncontact","Unable to participate"))))

rr_data %>% 
    ggplot(aes(x=factor(year),y=value, fill=name)) +
    geom_bar(position="stack",stat="identity",color="white")+
    scale_fill_manual(values=rev(col5))+
    scale_y_continuous(limits=c(0,1),labels=scales::percent,breaks=seq(0,1,by=0.1),sec.axis=dup_axis())+
    scale_x_discrete(labels=as.character(paste0("'",substr(rr$year,3,4))))+
    theme_ipsum() +
    theme(legend.position="bottom",
          panel.grid.minor=element_blank(),
          panel.grid.major=element_blank(),
          plot.margin = unit(c(1,1,1,1), "mm")) +
    guides(fill=guide_legend(nrow=2,byrow=TRUE,reverse=TRUE))+
    labs(title=NULL,
         #title="Response, rejection and no contact rate 1983-2021",
         x=NULL,
         y=NULL,
         fill=NULL)

ggsave("figs/fig1_responseovertime.png",width=8,height=5,bg="white")

sim1<-readRDS("temp/mae_simulation_agecat.Rdata") %>% mutate(var="agecat")
sim2<-readRDS("temp/mae_simulation_uni.Rdata") %>% mutate(var="uni")
sim3<-readRDS("temp/mae_simulation_capital.Rdata") %>% mutate(var="capital")
sim4<-readRDS("temp/mae_simulation_female.Rdata") %>% mutate(var="female")

sim <- rbind(sim1,sim2,sim3,sim4)


####3


w1 <- read_dta("data/age_errors.dta") %>%  select(year, Sample=p1, Population=share1) %>% mutate(var="agecat",group="18-29 year old (age)",year_no=row_number())
w2 <- read_dta("data/educ_errors.dta") %>%  select(year, Sample=p3, Population=share3) %>% mutate(var="uni",group="University (education)",year_no=row_number())
w3 <- read_dta("data/capitalarea_errors.dta") %>%  select(year, Sample=p, Population=share) %>% mutate(var="capital",group="Non-capital area (residence)",year_no=row_number())
w4 <- read_dta("data/female_errors.dta") %>%  select(year, Sample=p, Population=share) %>% mutate(var="female",group="Female (gender)",year_no=row_number())

w = rbind(w1,w2,w3,w4) %>% pivot_longer(c(Sample,Population))

w <- w %>% left_join(sim,by=c("year","var"))

w %>% mutate(name=fct_rev(name)) %>% 
  ggplot(aes(x=year_no,y=value, group=name,color=name,shape=name,fill=name)) +
  facet_wrap(~group) +
  geom_ribbon(aes(ymin = q025, ymax = q975), fill = "grey90",color="gray90") +
  geom_line() +
  geom_line(aes(y=mae_median),color="gray60")+
  geom_point(size=2) +
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  scale_shape_manual(values=c(24,22),name=NULL)+
  scale_y_continuous(limits=c(0,0.55),labels=scales::percent,breaks=seq(0,1,by=0.1))+
  scale_x_continuous(limits=c(1,12),breaks=seq(1,12,by=1),labels=as.character(paste0("'",substr(rr$year,3,4))))+
  theme_ipsum() +
  theme(legend.position="bottom",
        panel.grid.minor=element_blank(),
        panel.grid.major.x=element_blank(),
        plot.margin = unit(c(1,1,1,1), "mm")) +
  labs(title=NULL,
       #title="Sample data and population benchmarks 1983-2021",
       x=NULL,
       y=NULL,
       color=NULL,
       fill=NULL)

ggsave("figs/fig2_samplepopbenchmarks.png",width=8,height=6,bg="white")

sim<-readRDS("temp/mae_simulation.Rdata") 

w <- read_dta("data/weights_predictions.dta") %>% 
  mutate(type=factor(weight,levels=c("xswt","A","AMD","E","AMDE","D"),
                     labels=c("Unweighted","Age only","Age, gender, district",
                              "Education only", "Age, gender, district, education","District only")))

wX <- w %>% filter(weight=="xswt") %>% select(year,maeX=mae) %>% mutate(year_no=row_number()) %>% 
        left_join(sim,by=("year"))

wXd <- w %>% left_join(wX, by="year") %>%  mutate(mae_d = mae-maeX)


wXd %>% filter(weight %in% c("xswt")) %>% 
  ggplot(aes(x=year_no,y=100*mae,color=weight)) +
  geom_ribbon(aes(ymin = q025, ymax = q975), fill = "grey90",color="gray90") +
  geom_line(size=1.5) +
  geom_line(aes(y=mae_median),color="gray60")+
  #geom_smooth(method="lm",se=FALSE,color="#FF7F7F") +
  geom_point() +
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  scale_y_continuous(limits=c(0,2.1))+
  scale_x_continuous(limits=c(1,12),breaks=seq(1,12,by=1),labels=as.character(paste0("'",substr(wX$year,3,4))))+
  theme_ipsum() +
  theme(legend.position="none",
        panel.grid.minor=element_blank()) +
  labs(title=NULL,
       x=NULL,
       y="Mean absolute error (pp.)",
       color=NULL)

ggsave("figs/fig3_unweightedMAE.png",width=8,height=5,bg="white")

wXd %>% filter(weight %in% c("xswt","AMDE","A","E")) %>% 
  ggplot(aes(x=year_no,y=100*mae, group=type,color=type,shape=type,fill=type)) +
  geom_ribbon(aes(ymin = q025, ymax = q975), fill = "grey90",color="gray90") +
    geom_line() +  
    geom_line(data=subset(wXd,weight=="xswt"),size=1.5) +
    geom_point(size=2) +
    scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
    #scale_color_manual(values=col5)+
    #scale_fill_manual(values=col5)+
    scale_y_continuous(limits=c(0,2.5))+
    scale_x_continuous(limits=c(1,12),breaks=seq(1,12,by=1),labels=as.character(paste0("'",substr(wX$year,3,4))))+
  scale_shape_manual(values=c(23,22,24,21),name=NULL)+  
  theme_ipsum() +
    theme(legend.position="bottom",
          panel.grid.minor=element_blank()) +
    labs(title=NULL,
         x=NULL,
         y="Mean absolute error (pp.)",
         color=NULL,
         fill=NULL,
         shape=NULL)

ggsave("figs/fig4_compareMAE.png",width=8,height=6,bg="white")



wXd %>% filter(weight %in% c("xswt","AMD","D")) %>% 
  ggplot(aes(x=year_no,y=100*mae, group=type,color=type,shape=type,fill=type)) +
  geom_ribbon(aes(ymin = q025, ymax = q975), fill = "grey90",color="gray90") +
  geom_line() +  
  geom_line(data=subset(wXd,weight=="xswt"),size=1.5) +
  geom_point(size=2) +
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  #scale_color_manual(values=col5)+
  #scale_fill_manual(values=col5)+
  scale_y_continuous(limits=c(0,2.5))+
  scale_x_continuous(limits=c(1,12),breaks=seq(1,12,by=1),labels=as.character(paste0("'",substr(wX$year,3,4))))+
  scale_shape_manual(values=c(23,22,24,21),name=NULL)+  
  theme_ipsum() +
  theme(legend.position="bottom",
        panel.grid.minor=element_blank()) +
  labs(title=NULL,
       x=NULL,
       y="Mean absolute error (pp.)",
       color=NULL,
       fill=NULL,
       shape=NULL)

ggsave("figs/fig4SI_compareMAE_district.png",width=8,height=6,bg="white")
